1 /**
2  * Hash Map
3  * Copyright: © 2015 Economic Modeling Specialists, Intl.
4  * Authors: Brian Schott
5  * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6  */
7 
8 module containers.hashmap;
9 
10 private import core.lifetime : move;
11 private import containers.internal.hash;
12 private import containers.internal.node : shouldAddGCRange;
13 private import std.experimental.allocator.mallocator : Mallocator;
14 private import std.traits : isBasicType, Unqual;
15 
16 /**
17  * Associative array / hash map.
18  * Params:
19  *     K = the key type
20  *     V = the value type
21  *     Allocator = the allocator type to use. Defaults to `Mallocator`
22  *     hashFunction = the hash function to use on the keys
23  *     supportGC = true if the container should support holding references to
24  *         GC-allocated memory.
25  */
26 struct HashMap(K, V, Allocator = Mallocator, alias hashFunction = generateHash!K,
27 	bool supportGC = shouldAddGCRange!K || shouldAddGCRange!V,
28 	bool storeHash = true)
29 {
30 	this(this) @disable;
31 
32 	private import std.experimental.allocator.common : stateSize;
33 
34 	static if (stateSize!Allocator != 0)
35 	{
36 		this() @disable;
37 
38 		/**
39 		 * Use the given `allocator` for allocations.
40 		 */
41 		this(Allocator allocator) pure nothrow @nogc @safe
42 		in
43 		{
44 			assert(allocator !is null, "Allocator must not be null");
45 		}
46 		do
47 		{
48 			this.allocator = allocator;
49 		}
50 
51 		/**
52 		 * Constructs an HashMap with an initial bucket count of bucketCount. bucketCount
53 		 * must be a power of two.
54 		 */
55 		this(size_t bucketCount, Allocator allocator)
56 		in
57 		{
58 			assert(allocator !is null, "Allocator must not be null");
59 			assert((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two");
60 		}
61 		do
62 		{
63 			this.allocator = allocator;
64 			initialize(bucketCount);
65 		}
66 
67 		invariant
68 		{
69 			assert(allocator !is null, "Allocator must not be null");
70 		}
71 	}
72 	else
73 	{
74 		/**
75 		 * Constructs an HashMap with an initial bucket count of bucketCount. bucketCount
76 		 * must be a power of two.
77 		 */
78 		this(size_t bucketCount)
79 		in
80 		{
81 			assert((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two");
82 		}
83 		do
84 		{
85 			initialize(bucketCount);
86 		}
87 	}
88 
89 	~this() nothrow
90 	{
91 		scope (failure) assert(false, "HashMap destructor threw an exception");
92 		clear();
93 	}
94 
95 	/**
96 	 * Removes all items from the map
97 	 */
98 	void clear()
99 	{
100 		import std.experimental.allocator : dispose;
101 
102 		// always remove ranges from GC first before disposing of buckets, to
103 		// prevent segfaults when the GC collects at an unfortunate time
104 		static if (useGC)
105 			GC.removeRange(buckets.ptr);
106 		allocator.dispose(buckets);
107 
108 		buckets = null;
109 		_length = 0;
110 	}
111 
112 	/**
113 	 * Supports `aa[key]` syntax.
114 	 */
115 	ref opIndex(this This)(K key)
116 	{
117 		import std.conv : text;
118 		import std.exception : enforce;
119 
120 		alias CET = ContainerElementType!(This, V, true);
121 		size_t i;
122 		auto n = find(key, i);
123 		enforce(n !is null, "'" ~ text(key) ~ "' not found in HashMap");
124 		return *cast(CET*) &n.value;
125 	}
126 
127 	/**
128 	 * Returns: `true` if there is an entry in this map for the given `key`,
129 	 *     false otherwise.
130 	 */
131 	bool containsKey(this This)(K key) inout
132 	{
133 		size_t i;
134 		return find(key, i) !is null;
135 	}
136 
137 	/**
138 	 * Gets the value for the given key, or returns `defaultValue` if the given
139 	 * key is not present.
140 	 *
141 	 * Params:
142 	 *     key = the key to look up
143 	 *     value = the default value
144 	 * Returns: the value indexed by `key`, if present, or `defaultValue` otherwise.
145 	 */
146 	auto get(this This)(K key, lazy V defaultValue)
147 	{
148 		alias CET = ContainerElementType!(This, V);
149 
150 		size_t i;
151 		auto n = find(key, i);
152 		if (n is null)
153 			return defaultValue;
154 		return cast(CET) n.value;
155 	}
156 
157 	/**
158 	 * If the given key does not exist in the HashMap, adds it with
159 	 * the value `defaultValue`.
160 	 *
161 	 * Params:
162 	 *     key = the key to look up
163 	 *     value = the default value
164 	 * Returns: a pointer to the value stored in the HashMap with the given key.
165 	 *     The pointer is guaranteed to be valid only until the next HashMap
166 	 *     modification.
167 	 */
168 	auto getOrAdd(this This)(K key, lazy V defaultValue)
169 	{
170 		alias CET = ContainerElementType!(This, V);
171 
172 		size_t i;
173 		auto n = find(key, i);
174 		if (n is null)
175 			return cast(CET*) &insert(key, defaultValue).value;
176 		else
177 			return cast(CET*) &n.value;
178 	}
179 
180 	/**
181 	 * Supports $(B aa[key] = value;) syntax.
182 	 */
183 	void opIndexAssign(V value, const K key)
184 	{
185 		insert(key, move(mutable(value)));
186 	}
187 
188 	/**
189 	 * Supports $(B key in aa) syntax.
190 	 *
191 	 * Returns: pointer to the value corresponding to the given key,
192 	 * or null if the key is not present in the HashMap.
193 	 */
194 	inout(V)* opBinaryRight(string op)(const K key) inout nothrow @trusted if (op == "in")
195 	{
196 		size_t i;
197 		auto n = find(key, i);
198 		if (n is null)
199 			return null;
200 		return &(cast(inout) n).value;
201 	}
202 
203 	/**
204 	 * Removes the value associated with the given key
205 	 * Returns: true if a value was actually removed.
206 	 */
207 	bool remove(K key)
208 	{
209 		size_t i;
210 		auto n = find(key, i);
211 		if (n is null)
212 			return false;
213 		static if (storeHash)
214 			auto node = Node(n.hash, n.key);
215 		else
216 			auto node = Node(n.key);
217 		immutable bool removed = buckets[i].remove(node);
218 		if (removed)
219 			_length--;
220 		return removed;
221 	}
222 
223 	/**
224 	 * Returns: the number of key/value pairs in this container.
225 	 */
226 	size_t length() const nothrow pure @property @safe @nogc
227 	{
228 		return _length;
229 	}
230 
231 	/**
232 	 * Returns: `true` if there are no items in this container.
233 	 */
234 	bool empty() const nothrow pure @property @safe @nogc
235 	{
236 		return _length == 0;
237 	}
238 
239 	/**
240 	 * Returns: a range of the keys in this map.
241 	 */
242 	auto byKey(this This)() inout @trusted
243 	{
244 		return MapRange!(This, IterType.key)(cast(Unqual!(This)*) &this);
245 	}
246 
247 	/**
248 	 * Returns: a GC-allocated array filled with the keys contained in this map.
249 	 */
250 	K[] keys() const @property
251 	out(result)
252 	{
253 		assert (result.length == _length, "Length mismatch");
254 	}
255 	do
256 	{
257 		import std.array : appender;
258 		auto app = appender!(K[])();
259 		foreach (ref const bucket; buckets)
260 		{
261 			foreach (ref item; bucket)
262 				app.put(cast(K) item.key);
263 		}
264 		return app.data;
265 	}
266 
267 
268 	/**
269 	 * Returns: a range of the values in this map.
270 	 */
271 	auto byValue(this This)() inout @trusted
272 	{
273 		return MapRange!(This, IterType.value)(cast(Unqual!(This)*) &this);
274 	}
275 
276 	/// ditto
277 	alias opSlice = byValue;
278 
279 	/**
280 	 * Returns: a GC-allocated array containing the values contained in this map.
281 	 */
282 	auto values(this This)() const @property
283 	out(result)
284 	{
285 		assert (result.length == _length, "Length mismatch");
286 	}
287 	do
288 	{
289 		import std.array : appender;
290 		auto app = appender!(ContainerElementType!(This, V)[])();
291 		foreach (ref const bucket; buckets)
292 		{
293 			foreach (item; bucket)
294 				app.put(cast(ContainerElementType!(This, V)) item.value);
295 		}
296 		return app.data;
297 	}
298 
299 	/**
300 	 * Returns: a range of the kev/value pairs in this map. The element type of
301 	 *     this range is a struct with `key` and `value` fields.
302 	 */
303 	auto byKeyValue(this This)() inout @trusted
304 	{
305 		return MapRange!(This, IterType.both)(cast(Unqual!(This)*) &this);
306 	}
307 
308 	/**
309 	 * Support for $(D foreach(key, value; aa) { ... }) syntax;
310 	 */
311 	int opApply(int delegate(const ref K, ref V) del)
312 	{
313 		int result = 0;
314 		foreach (ref bucket; buckets)
315 			foreach (ref node; bucket[])
316 				if ((result = del(*cast(K*)&node.key, *cast(V*)&node.value)) != 0)
317 					return result;
318 		return result;
319 	}
320 
321 	/// ditto
322 	int opApply(int delegate(const ref K, const ref V) del) const
323 	{
324 		int result = 0;
325 		foreach (const ref bucket; buckets)
326 			foreach (const ref node; bucket[])
327 				if ((result = del(*cast(K*)&node.key, *cast(V*)&node.value)) != 0)
328 					return result;
329 		return result;
330 	}
331 
332 	/// ditto
333 	int opApply(int delegate(ref V) del)
334 	{
335 		int result = 0;
336 		foreach (ref bucket; buckets)
337 			foreach (ref node; bucket[])
338 				if ((result = del(*cast(V*)&node.value)) != 0)
339 					return result;
340 		return result;
341 	}
342 
343 	/// ditto
344 	int opApply(int delegate(const ref V) del) const
345 	{
346 		int result = 0;
347 		foreach (const ref bucket; buckets)
348 			foreach (const ref node; bucket[])
349 				if ((result = del(*cast(V*)&node.value)) != 0)
350 					return result;
351 		return result;
352 	}
353 
354 	mixin AllocatorState!Allocator;
355 
356 private:
357 
358 	import std.experimental.allocator : make, makeArray;
359 	import containers.unrolledlist : UnrolledList;
360 	import containers.internal.storage_type : ContainerStorageType;
361 	import containers.internal.element_type : ContainerElementType;
362 	import containers.internal.mixins : AllocatorState;
363 	import core.memory : GC;
364 
365 	enum bool useGC = supportGC && (shouldAddGCRange!K || shouldAddGCRange!V);
366 	alias Hash = typeof({ K k = void; return hashFunction(k); }());
367 
368 	static ref ContainerStorageType!T mutable(T)(ref T value) { return *cast(ContainerStorageType!T*)&value; }
369 
370 	enum IterType: ubyte
371 	{
372 		key, value, both
373 	}
374 
375 	static struct MapRange(MapType, IterType Type)
376 	{
377 		static if (Type == IterType.both)
378 		{
379 			struct FrontType
380 			{
381 				ContainerElementType!(MapType, K) key;
382 				ContainerElementType!(MapType, V) value;
383 			}
384 		}
385 		else static if (Type == IterType.value)
386 			alias FrontType = ContainerElementType!(MapType, V);
387 		else static if (Type == IterType.key)
388 			alias FrontType = ContainerElementType!(MapType, K);
389 		else
390 			static assert(false);
391 
392 		FrontType front()
393 		{
394 			static if (Type == IterType.both)
395 				return FrontType(cast(ContainerElementType!(MapType, K)) bucketRange.front.key,
396 					cast(ContainerElementType!(MapType, V)) bucketRange.front.value);
397 			else static if (Type == IterType.value)
398 				return cast(ContainerElementType!(MapType, V)) bucketRange.front.value;
399 			else static if (Type == IterType.key)
400 				return cast(ContainerElementType!(MapType, K)) bucketRange.front.key;
401 			else
402 				static assert(false);
403 		}
404 
405 		bool empty() const pure nothrow @nogc @property
406 		{
407 			return _empty;
408 		}
409 
410 		void popFront() pure nothrow @nogc
411 		{
412 			bucketRange.popFront();
413 			if (bucketRange.empty)
414 			{
415 				while (bucketRange.empty)
416 				{
417 					bucketIndex++;
418 					if (bucketIndex >= hm.buckets.length)
419 					{
420 						_empty = true;
421 						break;
422 					}
423 					else
424 						bucketRange = hm.buckets[bucketIndex][];
425 				}
426 			}
427 		}
428 
429 	private:
430 
431 		this(Unqual!(MapType)* hm)
432 		{
433 			this.hm = hm;
434 			this.bucketIndex = 0;
435 			bucketRange = typeof(bucketRange).init;
436 			this._empty = false;
437 
438 			while (true)
439 			{
440 				if (bucketIndex >= hm.buckets.length)
441 				{
442 					_empty = true;
443 					break;
444 				}
445 				bucketRange = hm.buckets[bucketIndex][];
446 				if (bucketRange.empty)
447 					bucketIndex++;
448 				else
449 					break;
450 			}
451 		}
452 
453 		Unqual!(MapType)* hm;
454 		size_t bucketIndex;
455 		typeof(hm.buckets[0].opSlice()) bucketRange;
456 		bool _empty;
457 	}
458 
459 	void initialize(size_t bucketCount = DEFAULT_BUCKET_COUNT)
460 	{
461 		import std.conv : emplace;
462 		assert((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two");
463 
464 		buckets = makeArray!Bucket(allocator, bucketCount);
465 		static if (useGC)
466 			GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof);
467 		foreach (ref bucket; buckets)
468 		{
469 			static if (stateSize!Allocator == 0)
470 				emplace(&bucket);
471 			else
472 				emplace(&bucket, allocator);
473 		}
474 	}
475 
476 	Node* insert(const K key, V value)
477 	{
478 		return insert(key, move(mutable(value)), hashFunction(key));
479 	}
480 
481 	Node* insert(const K key, V value, const Hash hash, const bool modifyLength = true)
482 	{
483 		if (buckets.length == 0)
484 			initialize();
485 		immutable size_t index = hashToIndex(hash, buckets.length);
486 		foreach (ref item; buckets[index])
487 		{
488 			if (item.hash == hash && item.key == key)
489 			{
490 				item.value = move(mutable(value));
491 				return &item;
492 			}
493 		}
494 		static if (storeHash)
495 			Node node = Node(hash, cast(ContainerStorageType!K) key, move(mutable(value)));
496 		else
497 			Node node = Node(cast(ContainerStorageType!K) key, move(mutable(value)));
498 		Node* n = buckets[index].insertAnywhere(move(node));
499 		if (modifyLength)
500 			_length++;
501 		if (shouldRehash())
502 		{
503 			rehash();
504 			immutable newIndex = hashToIndex(hash, buckets.length);
505 			foreach (ref item; buckets[newIndex])
506 			{
507 				if (item.hash == hash && item.key == key)
508 					return &item;
509 			}
510 			assert(false, "Inserted item not found after rehash");
511 		}
512 		else
513 			return n;
514 	}
515 
516 	/**
517 	 * Returns: true if the load factor has been exceeded
518 	 */
519 	bool shouldRehash() const pure nothrow @safe @nogc
520 	{
521 		// We let this be greater than one because each bucket is an unrolled
522 		// list that has more than one element per linked list node.
523 		return (float(_length) / float(buckets.length)) > 1.33f;
524 	}
525 
526 	/**
527 	 * Rehash the map.
528 	 */
529 	void rehash() @trusted
530 	{
531 		import std.conv : emplace;
532 		immutable size_t newLength = buckets.length << 1;
533 		immutable size_t newSize = newLength * Bucket.sizeof;
534 		Bucket[] oldBuckets = buckets;
535 		buckets = cast(Bucket[]) allocator.allocate(newSize);
536 		if (newLength)
537 			assert (buckets, "Bucket reallocation failed");
538 		static if (useGC)
539 			GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof);
540 		assert (buckets.length == newLength, "Bucket reallocation length mismatch");
541 		foreach (ref bucket; buckets)
542 		{
543 			static if (stateSize!Allocator == 0)
544 				emplace(&bucket);
545 			else
546 				emplace(&bucket, allocator);
547 		}
548 
549 		foreach (ref bucket; oldBuckets)
550 		{
551 			foreach (ref node; bucket)
552 				insert(cast(K) node.key, move(node.value), node.hash, false);
553 			typeid(typeof(bucket)).destroy(&bucket);
554 		}
555 		static if (useGC)
556 			GC.removeRange(oldBuckets.ptr);
557 		allocator.deallocate(cast(void[]) oldBuckets);
558 	}
559 
560 	inout(Node)* find(const K key, ref size_t index) inout
561 	{
562 		return find(key, index, hashFunction(key));
563 	}
564 
565 	inout(Node)* find(const K key, ref size_t index, const Hash hash) inout
566 	{
567 		import std.array : empty;
568 
569 		if (buckets.empty)
570 			return null;
571 		index = hashToIndex(hash, buckets.length);
572 		foreach (ref r; buckets[index])
573 		{
574 			if (r.hash == hash && r.key == key)
575 				return cast(inout(Node)*) &r;
576 		}
577 		return null;
578 	}
579 
580 	struct Node
581 	{
582 		bool opEquals(ref const K key) const
583 		{
584 			return key == this.key;
585 		}
586 
587 		bool opEquals(ref const Node n) const
588 		{
589 			return this.hash == n.hash && this.key == n.key;
590 		}
591 
592 		static if (storeHash)
593 			Hash hash;
594 		else
595 			@property Hash hash() const { return hashFunction(key); }
596 
597 		ContainerStorageType!K key;
598 		ContainerStorageType!V value;
599 	}
600 
601 	alias Bucket = UnrolledList!(Node, Allocator, useGC);
602 	Bucket[] buckets;
603 	size_t _length;
604 }
605 
606 ///
607 unittest
608 {
609 	import std.uuid : randomUUID;
610 	import std.range.primitives : walkLength;
611 
612 	auto hm = HashMap!(string, int)(16);
613 	assert (hm.length == 0);
614 	assert (!hm.remove("abc"));
615 	hm["answer"] = 42;
616 	assert (hm.length == 1);
617 	assert ("answer" in hm);
618 	assert (hm.containsKey("answer"));
619 	hm.remove("answer");
620 	assert (hm.length == 0);
621 	assert ("answer" !in hm);
622 	assert (hm.get("answer", 1000) == 1000);
623 	assert (!hm.containsKey("answer"));
624 	hm["one"] = 1;
625 	hm["one"] = 1;
626 	assert (hm.length == 1);
627 	assert (hm["one"] == 1);
628 	hm["one"] = 2;
629 	assert(hm["one"] == 2);
630 	foreach (i; 0 .. 1000)
631 	{
632 		hm[randomUUID().toString] = i;
633 	}
634 	assert (hm.length == 1001);
635 	assert (hm.keys().length == hm.length);
636 	assert (hm.values().length == hm.length);
637 	() @nogc {
638 		assert (hm.byKey().walkLength == hm.length);
639 		assert (hm.byValue().walkLength == hm.length);
640 		assert (hm[].walkLength == hm.length);
641 		assert (hm.byKeyValue().walkLength == hm.length);
642 	}();
643 	foreach (v; hm) {}
644 
645 	auto hm2 = HashMap!(char, char)(4);
646 	hm2['a'] = 'a';
647 
648 	HashMap!(int, int) hm3;
649 	assert (hm3.get(100, 20) == 20);
650 	hm3[100] = 1;
651 	assert (hm3.get(100, 20) == 1);
652 	auto pValue = 100 in hm3;
653 	assert(*pValue == 1);
654 }
655 
656 version(emsi_containers_unittest) unittest
657 {
658 	static class Foo
659 	{
660 		string name;
661 	}
662 
663 	void someFunc(const scope ref HashMap!(string,Foo) map) @safe
664 	{
665 		foreach (kv; map.byKeyValue())
666 		{
667 			assert (kv.key == "foo");
668 			assert (kv.value.name == "Foo");
669 		}
670 	}
671 
672 	auto hm = HashMap!(string, Foo)(16);
673 	auto f = new Foo;
674 	f.name = "Foo";
675 	hm.insert("foo", f);
676 	assert("foo" in hm);
677 }
678 
679 // Issue #54
680 version(emsi_containers_unittest) unittest
681 {
682 	HashMap!(string, int) map;
683 	map.insert("foo", 0);
684 	map.insert("bar", 0);
685 
686 	foreach (key; map.keys())
687 		map[key] = 1;
688 	foreach (key; map.byKey())
689 		map[key] = 1;
690 
691 	foreach (value; map.byValue())
692 		assert(value == 1);
693 	foreach (value; map.values())
694 		assert(value == 1);
695 }
696 
697 version(emsi_containers_unittest) unittest
698 {
699 	HashMap!(int, int, Mallocator, (int i) => i) map;
700 	auto p = map.getOrAdd(1, 1);
701 	assert(*p == 1);
702 	*p = 2;
703 	assert(map[1] == 2);
704 }
705 
706 debug (EMSI_CONTAINERS) version(emsi_containers_unittest) unittest
707 {
708 	import std.uuid : randomUUID;
709 	import std.algorithm.iteration : walkLength;
710 	import std.stdio;
711 
712 	auto hm = HashMap!(string, int)(16);
713 	foreach (i; 0 .. 1_000_000)
714 	{
715 		auto str = randomUUID().toString;
716 		//writeln("Inserting ", str);
717 		hm[str] = i;
718 		//if (i > 0 && i % 100 == 0)
719 			//writeln(i);
720 	}
721 	writeln(hm.buckets.length);
722 
723 	import std.algorithm.sorting:sort;
724 	ulong[ulong] counts;
725 	foreach (i, ref bucket; hm.buckets[])
726 		counts[bucket.length]++;
727 	foreach (k; counts.keys.sort())
728 		writeln(k, "=>", counts[k]);
729 }
730 
731 // #74
732 version(emsi_containers_unittest) unittest
733 {
734 	HashMap!(string, size_t) aa;
735 	aa["b"] = 0;
736 	++aa["b"];
737 	assert(aa["b"] == 1);
738 }
739 
740 // storeHash == false
741 version(emsi_containers_unittest) unittest
742 {
743 	static struct S { size_t v; }
744 	HashMap!(S, S, Mallocator, (S s) { return s.v; }, false, false) aa;
745 	static assert(aa.Node.sizeof == 2 * S.sizeof);
746 }
747 
748 version(emsi_containers_unittest) unittest
749 {
750 	auto hm = HashMap!(string, int)(16);
751 
752 	foreach (v; hm) {}
753 	foreach (ref v; hm) {}
754 	foreach (int v; hm) {}
755 	foreach (ref int v; hm) {}
756 	foreach (const ref int v; hm) {}
757 
758 	foreach (k, v; hm) {}
759 	foreach (k, ref v; hm) {}
760 	foreach (k, int v; hm) {}
761 	foreach (k, ref int v; hm) {}
762 	foreach (k, const ref int v; hm) {}
763 
764 	foreach (ref k, v; hm) {}
765 	foreach (ref k, ref v; hm) {}
766 	foreach (ref k, int v; hm) {}
767 	foreach (ref k, ref int v; hm) {}
768 	foreach (ref k, const ref int v; hm) {}
769 
770 	foreach (const string k, v; hm) {}
771 	foreach (const string k, ref v; hm) {}
772 	foreach (const string k, int v; hm) {}
773 	foreach (const string k, ref int v; hm) {}
774 	foreach (const string k, const ref int v; hm) {}
775 
776 	foreach (const ref string k, v; hm) {}
777 	foreach (const ref string k, ref v; hm) {}
778 	foreach (const ref string k, int v; hm) {}
779 	foreach (const ref string k, ref int v; hm) {}
780 	foreach (const ref string k, const ref int v; hm) {}
781 
782 	hm["a"] = 1;
783 	foreach (k, ref v; hm) { v++; }
784 	assert(hm["a"] == 2);
785 }
786 
787 version(emsi_containers_unittest) unittest
788 {
789 	static struct S { @disable this(this); }
790 	alias HM = HashMap!(int, S);
791 }
792 
793 version(emsi_containers_unittest) unittest
794 {
795 	struct S { int* a; }
796 	alias HM = HashMap!(S, int);
797 }